library(magrittr)
library(ggplot2)
pages <- file.path("pages", list.files(path = "pages", pattern = "*.html")) %>%
purrr::map(rvest::read_html)
reviews <- tibble::tibble(
rating = purrr::map(
pages,
~ rvest::html_elements(., ".num") %>%
rvest::html_text2() %>%
purrr::discard(stringr::str_detect(., "reviews")) %>%
stringr::str_remove("Rating Score") %>%
as.integer()
),
text = purrr::map(
pages,
~ rvest::html_elements(., ".rvtit") %>%
rvest::html_text2() %>%
stringr::str_remove_all(., "”|“")
),
date = purrr::map(
pages,
~ rvest::html_elements(., ".auth") %>%
rvest::html_text2() %>%
stringr::word(., -1, sep="- ") %>%
lubridate::mdy()
)
) %>%
tibble::rowid_to_column(var = "page") %>%
tidyr::unnest(cols = c(rating, text, date)) %>%
tibble::rowid_to_column(var = "review_id")
tidy_reviews <- reviews %>%
tidytext::unnest_tokens(word, text)
bing_sentiments <- tidytext::get_sentiments("bing")
tidy_reviews <- tidy_reviews %>%
dplyr::anti_join(dplyr::filter(tidytext::stop_words, lexicon == "snowball"), by = "word") %>%
dplyr::inner_join(bing_sentiments, by = "word") %>%
dplyr::mutate(month = lubridate::floor_date(date, unit = "months"))
review_sentiments <- tidy_reviews %>%
dplyr::group_by(month) %>%
dplyr::summarise(median_rating = median(rating),
positive = length(sentiment[sentiment == "positive"]),
negative = length(sentiment[sentiment == "negative"]),
polarity = positive / negative)
dplyr::left_join(tidy_reviews, review_sentiments, by = "month") %>%
dplyr::mutate(polarity_rescaled = scales::rescale(polarity, to = c(1, 5))) %>%
dplyr::filter(month > (dplyr::last(sort(unique(month))) - months(12))) %>%
ggplot() +
geom_line(aes(x = month, y = polarity_rescaled), colour = "#e28743", linewidth = 1.2, lineend = "round") +
geom_line(aes(x = month, y = median_rating), colour = "#063970", linewidth = 1.2, lineend = "round") +
theme_minimal() +
scale_y_continuous(name = "Score", limits = c(1, 5)) +
scale_x_date(name = "Month", date_breaks = "1 months", date_labels = "%b %y", minor_breaks = NULL) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10))) +
annotate(
geom = "curve",
x = as.Date("2022-11-01"),
y = 3.5,
xend = as.Date("2022-10-10"),
yend = 3,
curvature = -.3,
linewidth = 1.2,
arrow = grid::arrow(angle = 20, type = "closed", length = grid::unit(2, "mm"))
) +
patchwork::inset_element(
png::readPNG(file.path("images", "musk_head.png"), native = TRUE),
0.7, 0.7, 0.95, 0.9,
align_to = 'full'
) +
theme_void()